<?php
defined('BASEPATH') OR exit('No direct script access allowed');

// M V C Logic:Logic处于Controler跟Model之间
//后续优化
class Crawl extends CI_Controller{
	public function __construct(){
		//判断该接口是否是从命令行调用
		if(!is_cli()){
			exit('illegal call!');
		}

		parent::__construct();
	}

	public function crawl_links(){
		set_time_limit(0);//取消执行时间限制

		$this->config->load('crawl');
		$sUrl = $this->config->item('url_start');

		$this->load->library("redisclient");
		
		$this->redisclient->inQueue('link_queue1', $sUrl);

		$this->load->model('Movie');

		while($this->redisclient->getQueueLen('link_queue1')){
			$sUrl = $this->redisclient->deQueue('link_queue1')[1];
			
			if(empty($sUrl) || $this->redisclient->isInSet('url_set1', $sUrl)){
				sleep(3);
				continue;
			}

			preg_match("#https:\/\/movie.douban.com\/subject\/(\d+)#", $sUrl, $aMatch);
			if(!empty($aMatch[1])){
				$this->redisclient->inQueue('movie_url1', $sUrl);
			}

			$aUrl = $this->Movie->getAllLinks($sUrl);
			foreach ($aUrl as $sNewUrl) {
				$this->redisclient->inQueue('link_queue1', $sNewUrl);
			}

			$this->redisclient->addToSet('url_set1', $sUrl);

			sleep(mt_rand(3,10));
		}
	}


	public function crawl_movie(){
		$this->load->model('Movie');
		$this->load->library("redisclient");
		$this->load->helper('common');

		while(true){
			$sUrl = $this->redisclient->deQueue('movie_url1')[1];
			$aMovieInfo = $this->Movie->getMovieInfo($sUrl);
			if(empty($aMovieInfo)){
				sleep(3);
				continue;
			}

			$insert_id = $this->Movie->addMovie($aMovieInfo);
			if(empty($insert_id)){
				log_warning('添加电影信息失败:' . json_encode($aMovieInfo, JSON_UNESCAPED_UNICODE));
			}

			sleep(mt_rand(3,10));
		}
	}
}